Packages(just in case haven’t installed)
library(tidyverse)
library(tidytext)
library(ggrepel)
library(plotly)
Data readin
words <- read_csv('data/sentiment-data.csv', col_names = F)
words
## # A tibble: 127 × 3
## X1 X2 X3
## <chr> <chr> <chr>
## 1 Write down four words to describe how you're feeling about this … Writ… Writ…
## 2 Excited, Hopeful, Prepared, good Ready Info…
## 3 Unsure, confused, anxious, curious apat… Exci…
## 4 Co operations, Teamwork, communication, critical thinking Team… I wi…
## 5 a a a
## 6 First, team work, nervous, curious Nerv… New
## 7 Interesting. New. Exciting. Develop Inte… Exci…
## 8 perplexed,anxious,embarrassed,bit excited hope… resp…
## 9 Novel, Unknown, Challenging, Useful Nerv… accu…
## 10 Worried, excited, self-doubt, motivated Nerv… Hope…
## # … with 117 more rows
Data Cleaning
# complicated data cleaning for first question :(
col1_words <- words %>%
slice(-1) %>%
select(X1) %>%
unnest_tokens(output = word, input = X1, token = str_split, pattern = ',|、|\\*|\\. |,') %>%
mutate(word = str_trim(word)) %>%
filter(str_count(word, '\\ ') < 4 & str_length(word) > 2) %>%
mutate(word = ifelse(str_count(word, '\\ ') == 3, str_split(word, '\\ '), word)) %>%
unnest(word) %>%
mutate(word = str_remove(word, '.{8}\\)|\\ to$|^be\\ |^bit\\ |.{18}'),
word = str_split(word, '\\/')) %>%
unnest(word) %>%
filter(str_length(word) > 0)
col1_words
## # A tibble: 459 × 1
## word
## <chr>
## 1 excited
## 2 hopeful
## 3 prepared
## 4 good
## 5 unsure
## 6 confused
## 7 anxious
## 8 curious
## 9 co operations
## 10 teamwork
## # … with 449 more rows
top 20 most frequent words in question one with its visualization
top_20 <- col1_words %>%
count(word, sort = T) %>%
head(20);top_20
## # A tibble: 20 × 2
## word n
## <chr> <int>
## 1 nervous 59
## 2 excited 55
## 3 curious 34
## 4 anxious 14
## 5 scared 12
## 6 worried 12
## 7 interested 11
## 8 interesting 11
## 9 hopeful 10
## 10 difficult 9
## 11 challenging 6
## 12 confused 6
## 13 unknown 6
## 14 apprehensive 5
## 15 exciting 5
## 16 happy 5
## 17 stressed 5
## 18 teamwork 5
## 19 motivated 4
## 20 uncertain 4
top_20 %>%
mutate(word = fct_reorder(word, n)) %>%
ggplot(aes(n, word, fill = word)) +
geom_col(show.legend = F, width = .5) +
scale_fill_viridis_d() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_text(angle = 0, vjust = .5, size = rel(1.2))) +
xlab('frequent') +
xlim(c(0,70)) +
geom_text(aes(label=word), hjust = -.5)

sentiment words comparison
p <- col1_words %>%
count(word) %>%
inner_join(get_sentiments()) %>%
group_by(sentiment) %>%
slice_max(n, n = 9) %>%
ungroup() %>%
mutate(n = ifelse(sentiment == "negative", -n, n)) %>%
mutate(word = fct_reorder(word, n)) %>%
ggplot() +
geom_col(aes(n, word, fill = sentiment)) +
labs(x = "Contribution to sentiment")
ggplotly(p)
col1_words %>%
count(word) %>%
inner_join(get_sentiments()) %>%
ggplot(aes(0,0)) +
geom_text_repel(aes(label = word, size = n, colour = sentiment),
force_pull = 0, max.overlaps = Inf,
segment.color = NA, point.padding = NA, seed = 399, show.legend = F) +
facet_grid(~ sentiment) +
theme_bw() +
theme(axis.text = element_blank(), axis.ticks = element_blank()) +
labs(x = "", y = "")
